from sklearn import preprocessing
import numpy as np
import pandas as pd

INPUT_FEATURE_PATH = "../data/train_feature.csv"
INPUT_LABEL_PATH = "../data/train_label.csv"
OUTPUT_FEATURE_PROCESSED = "../data/train_feature_processed.csv"
OUTPUT_PATH = "../data/train_processed1.csv"

train_feature = pd.read_csv(INPUT_FEATURE_PATH)
train_label = pd.read_csv(INPUT_LABEL_PATH)
# print(train_feature)
s0 = train_feature["日期"]
s0_ = train_feature["时刻"]
s1 = (train_feature['辐照度'] - train_feature['辐照度'].min()) / \
    (train_feature['辐照度'].max() - train_feature['辐照度'].min())
s2 = (train_feature['风速'] - train_feature['风速'].min()) / \
    (train_feature['风速'].max() - train_feature['风速'].min())
s3 = (train_feature['风向'] - train_feature['风向'].min()) / \
    (train_feature['风向'].max() - train_feature['风向'].min())
s4 = (train_feature['温度'] - train_feature['温度'].min()) / \
    (train_feature['温度'].max() - train_feature['温度'].min())
s5 = (train_feature['湿度'] - train_feature['湿度'].min()) / \
    (train_feature['湿度'].max() - train_feature['湿度'].min())
s6 = (train_feature['气压'] - train_feature['气压'].min()) / \
    (train_feature['气压'].max() - train_feature['气压'].min())

cal = pd.concat([s0, s0_, s1, s2, s3, s4, s5, s6], axis=1)
cal.to_csv(OUTPUT_FEATURE_PROCESSED)

# 特征数据
index = 0
data_f = []
with open(OUTPUT_FEATURE_PROCESSED) as f:
    for line in f:
        if index == 0:
            index = 1
            continue
        ll = line.strip().split(',')
        data_f.append(ll[1:])

# 标签数据
index = 0
data_l = []
with open(INPUT_LABEL_PATH) as f:
    for line in f:
        if index == 0:
            index = 1
            continue
        ll = line.strip().split(',')
        data_l.append(ll)

# 数据合并
data = {}
for data_item in data_f:
    if data_item[0] not in data:
        data[data_item[0]] = [float(data_item[0])]
    data[data_item[0]].append(float(data_item[2]))
    data[data_item[0]].append(float(data_item[3]))
    data[data_item[0]].append(float(data_item[4]))
    data[data_item[0]].append(float(data_item[5]))
    data[data_item[0]].append(float(data_item[6]))
    data[data_item[0]].append(float(data_item[7]))
for data_item in data_l:
    data[data_item[0]].append(float(data_item[1]))
# 合并
fout = open(OUTPUT_PATH, 'w')
for item in data:
    input_item = [str(data[item][-1])] + [str(data_item)
                                          for data_item in data[item][1:-1]]
    # print(len(input_item))
    fout.writelines("%s\n" % (",".join(input_item)))
print(data[item])
print(input_item)
fout.close()
print("completed")

INPUT_FEATURE_PATH = "../data/test_feature.csv"
OUTPUT_FEATURE_PROCESSED = "../data/test_feature_processed.csv"
OUTPUT_PATH = "../data/test_processed1.csv"


train_feature = pd.read_csv(INPUT_FEATURE_PATH)
s0 = train_feature["日期"]
s0_ = train_feature["时刻"]
s1 = (train_feature['辐照度'] - train_feature['辐照度'].min()) / \
    (train_feature['辐照度'].max() - train_feature['辐照度'].min())
s2 = (train_feature['风速'] - train_feature['风速'].min()) / \
    (train_feature['风速'].max() - train_feature['风速'].min())
s3 = (train_feature['风向'] - train_feature['风向'].min()) / \
    (train_feature['风向'].max() - train_feature['风向'].min())
s4 = (train_feature['温度'] - train_feature['温度'].min()) / \
    (train_feature['温度'].max() - train_feature['温度'].min())
s5 = (train_feature['湿度'] - train_feature['湿度'].min()) / \
    (train_feature['湿度'].max() - train_feature['湿度'].min())
s6 = (train_feature['气压'] - train_feature['气压'].min()) / \
    (train_feature['气压'].max() - train_feature['气压'].min())

cal = pd.concat([s0, s0_, s1, s2, s3, s4, s5, s6], axis=1)
cal.to_csv(OUTPUT_FEATURE_PROCESSED)


# 特征数据
index = 0
data_f = []
with open(OUTPUT_FEATURE_PROCESSED) as f:
    for line in f:
        if index == 0:
            index = 1
            continue
        ll = line.strip().split(',')
        data_f.append(ll[1:])
data_f = preprocessing.scale(data_f)
# 数据合并
data = {}
for data_item in data_f:
    if data_item[0] not in data:
        data[data_item[0]] = [float(data_item[0])]
    data[data_item[0]].append(float(data_item[2]))
    data[data_item[0]].append(float(data_item[3]))
    data[data_item[0]].append(float(data_item[4]))
    data[data_item[0]].append(float(data_item[5]))
    data[data_item[0]].append(float(data_item[6]))
    data[data_item[0]].append(float(data_item[7]))
print(len(data[data_item[0]]))
# 合并
fout = open(OUTPUT_PATH, 'w')
for item in data:
    data_input = [str(data_item) for data_item in data[item]]
    # print(len(data_input))
    fout.writelines("%s\n" % (",".join(data_input)))
print(data[item])
print(data_input)
fout.close()
print("completed")
